library(tidyverse)
library(janitor)
library(rsprite2)
library(scrutiny)
library(purrr)
library(patchwork)
library(knitr)
library(kableExtra)
min_decimals <- function(x, digits = 2) {
sprintf(paste0("%.", digits, "f"), x)
}
plot_helper <- function(dat, color_bounds = TRUE){
dat <- dat |>
mutate(label = case_when(mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
TRUE ~ "Possible value flagged as possible"))
if(color_bounds == FALSE){
# only GRIM-consistent values
p_grim <- dat |>
filter(grim) |>
ggplot(aes(mean, sd)) +
geom_point(shape = 15, size = 0.5, color = "grey35") +
theme_linedraw() +
scale_y_continuous(breaks = scales::breaks_pretty(n = 8),
limit = c(0, 4),
expand = c(0.01, 0.01)) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
limit = c(0.5, 7.5),
expand = c(0.01, 0.01)) +
scale_color_viridis_d(begin = 0.3, end = 0.7) +
ylab("Standard Deviation") +
xlab("Mean") +
ggtitle("GRIM consistent values")
# only GRIM and GRIMMER-consistent values
p_grimmer <- dat |>
filter(grim & grimmer) |>
ggplot(aes(mean, sd)) +
geom_point(shape = 15, size = 0.5, color = "grey35") +
theme_linedraw() +
scale_y_continuous(breaks = scales::breaks_pretty(n = 8),
limit = c(0, 4),
expand = c(0.01, 0.01)) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
limit = c(0.5, 7.5),
expand = c(0.01, 0.01)) +
scale_color_viridis_d(begin = 0.3, end = 0.7) +
ylab("Standard Deviation") +
xlab("Mean") +
ggtitle("GRIM + GRIMMER consistent values")
# only GRIM and GRIMMER and TIDES-consistent values
p_tides <- dat |>
filter(grim & grimmer & tides) |>
ggplot(aes(mean, sd)) +
geom_point(shape = 15, size = 0.5, color = "grey35") +
theme_linedraw() +
scale_y_continuous(breaks = scales::breaks_pretty(n = 8),
limit = c(0, 4),
expand = c(0.01, 0.01)) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
limit = c(0.5, 7.5),
expand = c(0.01, 0.01)) +
ylab("Standard Deviation") +
xlab("Mean") +
ggtitle("GRIM + GRIMMER + TIDES consistent values")
return(p_grim +
p_grimmer +
p_tides +
plot_layout(ncol = 1))
}
if(color_bounds){
# with coloring of impossible values
# only GRIM-consistent values with colors
p_temp_with_colors <- dat |>
filter(grim) |>
# mutate(label = case_when(grim == FALSE ~ "Impossible value flagged as impossible",
# mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
# sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
# TRUE ~ "Possible value flagged as possible")) |>
ggplot(aes(mean, sd, color = label)) +
geom_point(shape = 15, size = 0.5) + # "grey20"
theme_linedraw() +
scale_y_continuous(breaks = scales::breaks_pretty(n = 8),
limit = c(0, 4),
expand = c(0.01, 0.01)) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
limit = c(0.5, 7.5),
expand = c(0.01, 0.01)) +
scale_color_viridis_d(begin = 0.3, end = 0.7) +
ylab("Standard Deviation") +
xlab("Mean") +
ggtitle("GRIM consistent values") +
theme(legend.position = "top",
legend.direction = "vertical") +
guides(color = guide_legend(override.aes = list(size = 4, ncol = 1), title = NULL))
# Extract legend grob - magic chatGPT code no idea how it works
legend_grob <- ggplotGrob(p_temp_with_colors)$grobs[[which(sapply(ggplotGrob(p_temp_with_colors)$grobs, function(x) x$name) == "guide-box")]]
# Convert the legend grob into a patchwork-compatible object
legend <- wrap_elements(grid::grobTree(legend_grob))
# grim plot without legend
p_grim_with_colors <- p_temp_with_colors +
theme(legend.position = "none")
# only GRIM and GRIMMER-consistent values with colors
p_grimmer_with_colors <- dat |>
filter(grim & grimmer) |>
# mutate(label = case_when(grim == FALSE | grimmer == FALSE ~ "Impossible value flagged as impossible",
# mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
# sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
# TRUE ~ "Possible value flagged as possible")) |>
ggplot(aes(mean, sd, color = label)) +
geom_point(shape = 15, size = 0.5) +
theme_linedraw() +
scale_y_continuous(breaks = scales::breaks_pretty(n = 8),
limit = c(0, 4),
expand = c(0.01, 0.01)) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
limit = c(0.5, 7.5),
expand = c(0.01, 0.01)) +
scale_color_viridis_d(begin = 0.3, end = 0.7) +
ylab("Standard Deviation") +
xlab("Mean") +
ggtitle("GRIM + GRIMMER consistent values") +
theme(legend.position = "none")
# only GRIM and GRIMMER and TIDES-consistent values with colors
p_tides_with_colors <- dat |>
filter(grim & grimmer & tides) |>
# mutate(label = case_when(grim == FALSE | grimmer == FALSE | tides == FALSE ~ "Impossible value flagged as impossible",
# mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
# sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
# TRUE ~ "Possible value flagged as possible")) |>
ggplot(aes(mean, sd, color = label)) +
geom_point(shape = 15, size = 0.5) +
theme_linedraw() +
scale_y_continuous(breaks = scales::breaks_pretty(n = 8),
limit = c(0, 4),
expand = c(0.01, 0.01)) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
limit = c(0.5, 7.5),
expand = c(0.01, 0.01)) +
scale_color_viridis_d(begin = 0.3, end = 0.7, direction = -1) +
ylab("Standard Deviation") +
xlab("Mean") +
ggtitle("GRIM + GRIMMER + TIDES consistent values") +
theme(legend.position = "none")
return(legend +
p_grim_with_colors +
p_grimmer_with_colors +
p_tides_with_colors +
plot_layout(ncol = 1, heights = c(.1, 1, 1, 1)))
}
}
# proportion of possible values, considering only those where mean is within the scale bounds and SD is within Croucher's (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
table_helper <- function(dat, max_sd){
dat_feasible <- dat |>
filter(mean >= 1 & mean <= 7 & sd >= 0 & sd <= max_sd)
data.frame(check = c("GRIM", "GRIM+GRIMMER", "GRIM+GRIMMER+TIDES"),
n_total = dat_feasible |>
count(name = "n_total"),
n_possible = c(dat_feasible |>
filter(grim) |>
count() |>
pull(n),
dat_feasible |>
filter(grim, grimmer) |>
count() |>
pull(n),
dat_feasible |>
filter(grim, grimmer, tides) |>
count() |>
pull(n))) |>
mutate(proportion = janitor::round_half_up(n_possible/n_total, 3)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)
}tides_modified_from_.sd_limits <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
if (is.null(sd_prec)) {
sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
}
result <- c(-Inf, Inf)
aMax <- min_val
aMin <- floor(mean*n_items)/n_items
bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val) # Adjusted here
bMin <- min(aMin + 1/n_items, max_val) # Adjusted here
total <- round(mean * n_obs * n_items)/n_items
poss_values <- max_val
for (i in seq_len(n_items)) {
poss_values <- c(poss_values, min_val:(max_val-1) + (1 / n_items) * (i - 1))
}
poss_values <- sort(poss_values)
for (abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))) {
a <- abm[1]
b <- abm[2]
m <- abm[3]
# Adjust a and b to be within min_val and max_val
a <- min(max(a, min_val), max_val)
b <- min(max(b, min_val), max_val)
if (a == b) {
vec <- rep(a, n_obs)
} else {
k <- round((total - (n_obs * b)) / (a - b))
k <- min(max(k, 1), n_obs - 1)
vec <- c(rep(a, k), rep(b, n_obs - k))
diff <- sum(vec) - total
if ((diff < 0)) {
vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n_obs - k))
} else if ((diff > 0)) {
vec <- c(rep(a, k), b - diff, rep(b, n_obs - k - 1))
}
}
# instead of throwing errors here, return NA
# if (round(mean(vec), sd_prec) != round(mean, sd_prec) | !all(floor(vec*10e9) %in% floor(poss_values*10e9))) {
# stop("Error in calculating range of possible standard deviations")
# }
# result[m] <- round(sd(vec), sd_prec)
# Check if the calculated mean and values match expected conditions
if (round(mean(vec), sd_prec) == round(mean, sd_prec) & all(floor(vec*10e9) %in% floor(poss_values*10e9))) {
result[m] <- round(sd(vec), sd_prec)
}
}
# Replace Inf or -Inf with NA
result[is.infinite(result)] <- NA
# returns df instead of vector
res <-
data.frame(sd_min = result[1],
sd_max = result[2]) |>
mutate(tides = case_when(sd < sd_min ~ FALSE,
is.na(sd_min) ~ FALSE,
sd > sd_max ~ FALSE,
is.na(sd_max) ~ FALSE,
TRUE ~ TRUE))
return(res)
}
# dat_n_11 <-
# expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
# sd = seq(from = 0, to = 4, by = 0.01)) |>
# mutate(n_obs = 11,
# m_prec = 2,
# sd_prec = 2,
# n_items = 1,
# min_val = 1,
# max_val = 7) |>
# mutate(grim = pmap(list(as.character(mean), n_obs),
# grim)) |>
# mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
# grimmer)) |>
# mutate(tides = pmap(list(n_obs, mean, sd, min_val, max_val, sd_prec, n_items),
# tides_modified_from_.sd_limits)) |>
# unnest(grim) |>
# unnest(grimmer) |>
# unnest(tides)
#
# dat_n_100 <-
# expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
# sd = seq(from = 0, to = 4, by = 0.01)) |>
# mutate(n_obs = 100,
# m_prec = 2,
# sd_prec = 2,
# n_items = 1,
# min_val = 1,
# max_val = 7) |>
# mutate(grim = pmap(list(as.character(mean), n_obs),
# grim)) |>
# mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
# grimmer)) |>
# mutate(tides = pmap(list(n_obs, mean, sd, min_val, max_val, sd_prec, n_items),
# tides_modified_from_.sd_limits)) |>
# unnest(grim) |>
# unnest(grimmer) |>
# unnest(tides)
#
# write_rds(dat_n_11, "results_grim_grimmer_tides_n_11.rds")
# write_rds(dat_n_100, "results_grim_grimmer_tides_n_100.rds")
dat_n_11 <- read_rds("results_grim_grimmer_tides_n_11.rds")
dat_n_100 <- read_rds("results_grim_grimmer_tides_n_100.rds")| check | n_total | n_possible | proportion |
|---|---|---|---|
| GRIM | 216961 | 43681 | 0.201 |
| GRIM+GRIMMER | 216961 | 12797 | 0.059 |
| GRIM+GRIMMER+TIDES | 216961 | 3335 | 0.015 |
min_sd <- function(mean, n, min_score, max_score, integer_responses = TRUE) {
# ---- 1) required total sum (rounded to an integer if the problem implies
# exact feasibility)
required_sum <- n * mean
# round required_sum if the problem implies the mean is exactly representable
# by integer responses:
if(integer_responses){
required_sum <- round(required_sum)
}
# feasibility check: the sum must lie between n*min_score and n*max_score
# for an integer distribution to exist.
if (required_sum < n*min_score || required_sum > n*max_score) {
stop("No distribution of [min_score, max_score] integers can achieve that mean (sum out of range).")
}
# ---- 2) check if the mean is effectively an integer within [min_score,max_score].
# if so, the minimal SD is 0 by taking all responses = that integer.
if (abs(required_sum - n*round(mean)) < 1e-9 && round(mean) >= min_score && round(mean) <= max_score) {
# i.e. if mean was effectively an integer in the feasible range
# check if n * round(mean) == required_sum
if (round(mean) * n == required_sum) {
# all responses equal to that integer
dist <- rep(round(mean), n)
return(data.frame(min_sd = 0))
}
}
# ---- 3) otherwise, we attempt to use two adjacent integers L and L+1
# let L = floor(mean), clamped to [min_score, max_score].
L_init <- floor(mean)
if (L_init < min_score) L_init <- min_score
if (L_init > max_score) L_init <- max_score
# we'll define a small helper to build a distribution given L
# and return if feasible:
build_dist_from_L <- function(L, required_sum, n, min_score, max_score) {
if (L < min_score || L > max_score) {
return(NULL)
}
# leftover = how many times we need (L+1)
leftover <- required_sum - n*L
if (leftover == 0) {
# all L
return(rep(L, n))
} else if (leftover > 0 && leftover <= n) {
# leftover data points (L+1), the rest are L
# but only if (L+1) <= max_score
if ((L + 1) <= max_score) {
return(c(rep(L, n - leftover), rep(L + 1, leftover)))
} else {
return(NULL)
}
} else {
# leftover < 0 or leftover > n => not feasible with L and L+1
return(NULL)
}
}
# try L_init directly:
dist <- build_dist_from_L(L_init, required_sum, n, min_score, max_score)
if (is.null(dist)) {
# if that didn't work, try adjusting L_init up or down by 1 step
# (sometimes floor(mean) is not the right choice if leftover < 0 or > n).
# let's define a small search around L_init:
candidates <- unique(c(L_init - 1, L_init, L_init + 1))
candidates <- candidates[candidates >= min_score & candidates <= max_score]
found <- FALSE
for (L_try in candidates) {
dist_try <- build_dist_from_L(L_try, required_sum, n, min_score, max_score)
if (!is.null(dist_try)) {
dist <- dist_try
found <- TRUE
break
}
}
if (!found) {
# possibly no solution with 2 adjacent integers => no feasible distribution
stop("Could not construct a minimal-variance distribution with two adjacent integers.")
}
}
# if we reach here, 'dist' is a valid distribution
# ---- 4) compute sample SD in R (with denominator n-1)
min_sd <- sd(dist)
data.frame(min_sd = min_sd)
}
max_sd <- function(mean, n, min_score, max_score, integer_responses = TRUE) {
# 1) required sum
required_sum <- n * mean
# check: is required_sum in feasible range [n*min_score, n*max_score]?
if (required_sum < n*min_score - 1e-9 || required_sum > n*max_score + 1e-9) {
stop("No integer distribution can achieve this mean with responses in [min_score,max_score].")
}
# round required_sum if the problem implies the mean is exactly
# representable by integer responses:
if(integer_responses){
required_sum <- round(required_sum)
}
# 2) solve for x
x <- floor((required_sum - n*min_score) / (max_score - min_score))
x <- max(0, min(x, n)) # keep it in [0, n]
# 3) leftover = required_sum - [x*max_score + (n-x)*min_score]
delta <- required_sum - (x*max_score + (n - x)*min_score)
if (delta < 0 || delta >= (max_score - min_score)) {
stop("Something went wrong with leftover. Check input.")
}
# 4) sum of squares around mean:
# part A: from x responses at max_score
ss_max_score <- x * (max_score - mean)^2
# part B: from (n - x - 1) responses at min_score (if we do have a leftover)
# or from (n - x) responses at min_score (if no leftover)
if (delta == 0) {
# no middle value
ss_min_score <- (n - x) * (min_score - mean)^2
ss_m <- 0
} else {
ss_min_score <- (n - x - 1) * (min_score - mean)^2
middle_val <- min_score + delta
ss_m <- (middle_val - mean)^2
}
# total sum of squares
ss_total <- ss_max_score + ss_min_score + ss_m
# sample variance = ss_total / (n - 1)
var_max <- ss_total / (n - 1)
sd_max <- sqrt(var_max)
data.frame(max_sd = sd_max)
}
# dat_n_11_new <-
# expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
# sd = seq(from = 0, to = 4, by = 0.01)) |>
# mutate(n_obs = 11,
# m_prec = 2,
# sd_prec = 2,
# n_items = 1,
# min_val = 1,
# max_val = 7) |>
# mutate(grim = pmap(list(as.character(mean), n_obs),
# grim)) |>
# unnest(grim) |>
# mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
# grimmer)) |>
# unnest(grimmer) |>
# mutate(tides_max = pmap(list(mean, n_obs, min_val, max_val),
# possibly(max_sd, otherwise = NA))) |>
# unnest(tides_max) |>
# mutate(tides_min = pmap(list(mean, n_obs, min_val, max_val),
# possibly(min_sd, otherwise = NA))) |>
# unnest(tides_min) |>
# mutate(tides = case_when(sd <= max_sd & sd >= min_sd ~ TRUE,
# TRUE ~ FALSE))
#
# dat_n_100_new <-
# expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
# sd = seq(from = 0, to = 4, by = 0.01)) |>
# mutate(n_obs = 100,
# m_prec = 2,
# sd_prec = 2,
# n_items = 1,
# min_val = 1,
# max_val = 7) |>
# mutate(grim = pmap(list(as.character(mean), n_obs),
# grim)) |>
# unnest(grim) |>
# mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
# grimmer)) |>
# unnest(grimmer) |>
# mutate(tides_max = pmap(list(mean, n_obs, min_val, max_val),
# possibly(max_sd, otherwise = NA))) |>
# unnest(tides_max) |>
# mutate(tides_min = pmap(list(mean, n_obs, min_val, max_val),
# possibly(min_sd, otherwise = NA))) |>
# unnest(tides_min) |> |>
# mutate(tides = case_when(sd <= round(max_sd, 2) & sd >= round(min_sd, 2) ~ TRUE,
# TRUE ~ FALSE))
#
# write_rds(dat_n_11_new, "results_grim_grimmer_tides_n_11_new.rds")
# write_rds(dat_n_100_new, "results_grim_grimmer_tides_n_100_new.rds")
dat_n_11_new <- read_rds("results_grim_grimmer_tides_n_11_new.rds")
dat_n_100_new <- read_rds("results_grim_grimmer_tides_n_100_new.rds")| check | n_total | n_possible | proportion |
|---|---|---|---|
| GRIM | 139631 | 27925 | 0.200 |
| GRIM+GRIMMER | 139631 | 6397 | 0.046 |
| GRIM+GRIMMER+TIDES | 139631 | 6035 | 0.043 |
NB added rounding for checks
min_sd <- function(mean, n) {
# determine the closest integer values around the mean
lower_value <- floor(mean)
upper_value <- ceiling(mean)
# calculate frequencies required to achieve the mean
total_sum <- n * mean
lower_count <- n * (upper_value - mean)
upper_count <- n * (mean - lower_value)
# if the calculated counts are not integers, round them appropriately
lower_count <- round(lower_count)
upper_count <- round(upper_count)
# ensure the total count equals n
if (lower_count + upper_count != n) {
lower_count <- n - upper_count
}
# form the combination of values
values <- c(rep(lower_value, lower_count), rep(upper_value, upper_count))
# calculate the mean and standard deviation
#actual_mean <- mean(values)
min_sd <- sd(values)
#return(list(actual_mean = actual_mean, min_sd = min_sd))
return(min_sd)
}
max_sd <- function(mean, n, min, max){
# handle edge cases where the mean is at the minimum or maximum
if (mean == min || mean == max) {
max_sd <- 0
} else {
# mirror values for special cases to ensure min < max
if (abs(min) > abs(max)) {
temp <- min
min <- max
max <- temp
}
# calculate the number of observations at the maximum value
num_max <- floor((n * mean - n * min) / (max - min))
# calculate the number of observations at the minimum value
num_min <- n - 1 - num_max
# adjust max if num_max is 0
if (num_max == 0) {
max <- 0
}
# compute the sum of means adjusted by the number of min and max values
adjusted_mean_sum <- n * mean - num_min * min - num_max * max
# compute maximum variability
max_variability <- (num_min * (min - mean)^2 + num_max * (max - mean)^2 + (mean - adjusted_mean_sum)^2) / (n - 1)
# calculate the maximum standard deviation
max_sd <- sqrt(max_variability)
}
return(max_sd)
}
tides_single <- function(mean, sd, n, min, max, calculate_min_sd = TRUE, verbose = FALSE){
# calculate the maximum standard deviation
max_sd <- max_sd(mean, n, min, max)
# calculate the minimum standard deviation using the provided function
if (calculate_min_sd) {
min_sd <- min_sd(mean, n)
} else {
min_sd <- NA
}
# standardize the mean and SD based on the max possible range
standardized_mean <- (mean - min) / (max - min)
max_standardized_sd <- ( sqrt( (standardized_mean) * (1 - (standardized_mean)) ) ) * ( sqrt(n / (n - 1)) )
standardized_sd <- sd / max_sd
# create the result tibble based on verbosity
if (verbose) {
res <- tibble(mean = mean,
sd = sd,
n = n,
min = min,
max = max,
standardized_mean = standardized_mean,
standardized_sd = standardized_sd,
max_standardized_sd = max_standardized_sd,
min_sd = round(min_sd, 2),
max_sd = round(max_sd, 2),
tides = case_when(sd > max_sd | (sd < min_sd | is.na(min_sd)) ~ FALSE,
sd <= max_sd & (sd >= min_sd | is.na(min_sd)) ~ TRUE))
} else if (!verbose){
res <- tibble(standardized_mean = standardized_mean,
standardized_sd = standardized_sd,
max_standardized_sd = max_standardized_sd,
min_sd = round(min_sd, 2),
max_sd = round(max_sd, 2),
tides = case_when(sd > max_sd | (sd < min_sd | is.na(min_sd)) ~ FALSE,
sd <= max_sd & (sd >= min_sd | is.na(min_sd)) ~ TRUE))
}
return(res)
}
# dat_n_11_package <-
# expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
# sd = seq(from = 0, to = 4, by = 0.01)) |>
# mutate(n_obs = 11,
# m_prec = 2,
# sd_prec = 2,
# n_items = 1,
# min_val = 1,
# max_val = 7) |>
# mutate(grim = pmap(list(as.character(mean), n_obs),
# grim)) |>
# unnest(grim) |>
# mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
# grimmer)) |>
# unnest(grimmer) |>
# mutate(tides = pmap(list(mean, sd, n_obs, min_val, max_val),
# possibly(tides_single, otherwise = NA))) |>
# unnest(tides)
#
# dat_n_100_package <-
# expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
# sd = seq(from = 0, to = 4, by = 0.01)) |>
# mutate(n_obs = 100,
# m_prec = 2,
# sd_prec = 2,
# n_items = 1,
# min_val = 1,
# max_val = 7) |>
# mutate(grim = pmap(list(as.character(mean), n_obs),
# grim)) |>
# unnest(grim) |>
# mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
# grimmer)) |>
# unnest(grimmer) |>
# mutate(tides = pmap(list(mean, sd, n_obs, min_val, max_val),
# possibly(tides_single, otherwise = NA))) |>
# unnest(tides)
#
# write_rds(dat_n_11_package, "results_grim_grimmer_tides_n_11_package.rds")
# write_rds(dat_n_100_package, "results_grim_grimmer_tides_n_100_package.rds")
dat_n_11_package <- read_rds("results_grim_grimmer_tides_n_11_package.rds")
dat_n_100_package <- read_rds("results_grim_grimmer_tides_n_100_package.rds")| check | n_total | n_possible | proportion |
|---|---|---|---|
| GRIM | 139589 | 27995 | 0.201 |
| GRIM+GRIMMER | 139589 | 6395 | 0.046 |
| GRIM+GRIMMER+TIDES | 139589 | 6041 | 0.043 |
# identical(dat_n_11, dat_n_11_new)
#
# all.equal(dat_n_11, dat_n_11_new)
#
# colnames(dat_n_11)
dat_n_11_combined <-
rename(dat_n_11,
max_sd_.sd_limits = sd_max,
min_sd_.sd_limits = sd_min,
tides_modified_from_.sd_limits = tides) |>
full_join(rename(dat_n_11_new,
max_sd_new = max_sd,
min_sd_new = min_sd,
tides_new = tides),
by = c("mean", "sd", "n_obs", "m_prec", "sd_prec", "n_items", "min_val", "max_val")) |>
full_join(rename(dat_n_11_package,
max_sd_package = max_sd,
min_sd_package = min_sd,
tides_package = tides),
by = c("mean", "sd", "n_obs", "m_prec", "sd_prec", "n_items", "min_val", "max_val")) |>
mutate(max_sd_new_rounded = round(max_sd_new, 2),
min_sd_new_rounded = round(min_sd_new, 2),
max_sd_package = case_when(is.nan(max_sd_package) ~ NA,
TRUE ~ max_sd_package),
# grim_match = grim.x == grim.y,
# grimmer_match = grimmer.x == grimmer.y,
# tides_match = tides_modified_from_.sd_limits == tides_new,
min_sd_match = case_when(min_sd_.sd_limits == min_sd_new_rounded & min_sd_new_rounded == min_sd_package ~ TRUE,
TRUE ~ FALSE),
max_sd_match = case_when(max_sd_.sd_limits == max_sd_new_rounded & max_sd_new_rounded == max_sd_package ~ TRUE,
TRUE ~ FALSE),
tides_match = case_when(tides_modified_from_.sd_limits == tides_new & tides_new == tides_package ~ TRUE,
TRUE ~ FALSE))
# dat_n_11_combined |>
# count(tides_match)
# dat_n_11_combined |>
# count(min_sd_match)
# dat_n_11_combined |>
# count(grim_match)
#
# dat_n_11_combined |>
# count(grimmer_match)
# dat_n_11_combined |>
# count(tides_match)
# dat_n_11_combined |>
# count(max_sd_match)
res_comparisons <- dat_n_11_combined |>
select(mean,
sd,
#n_obs,
min_sd_.sd_limits,
min_sd_new = min_sd_new_rounded,
min_sd_package,
min_sd_match,
max_sd_.sd_limits,
max_sd_new = max_sd_new_rounded,
max_sd_package,
max_sd_match,
grim.x,
#grim.y,
grimmer.x,
#grimmer.y,
tides_modified_from_.sd_limits,
tides_new,
tides_package,
tides_match) |>
filter(!is.na(min_sd_.sd_limits) |
!is.na(min_sd_new) |
!is.na(min_sd_package) |
!is.na(max_sd_.sd_limits) |
!is.na(max_sd_new) |
!is.na(max_sd_package)) |>
distinct(min_sd_.sd_limits,
min_sd_new,
min_sd_package,
max_sd_.sd_limits,
max_sd_new,
max_sd_package,
grim.x,
grimmer.x,
tides_modified_from_.sd_limits,
tides_new,
tides_package,
tides_match,
.keep_all = TRUE) |>
filter(grim.x, grimmer.x)
# no rows have:
res_comparisons |>
filter(tides_modified_from_.sd_limits == TRUE & tides_new == FALSE)## # A tibble: 0 × 16
## # ℹ 16 variables: mean <dbl>, sd <dbl>, min_sd_.sd_limits <dbl>,
## # min_sd_new <dbl>, min_sd_package <dbl>, min_sd_match <lgl>,
## # max_sd_.sd_limits <dbl>, max_sd_new <dbl>, max_sd_package <dbl>,
## # max_sd_match <lgl>, grim.x <lgl>, grimmer.x <lgl>,
## # tides_modified_from_.sd_limits <lgl>, tides_new <lgl>, tides_package <lgl>,
## # tides_match <lgl>
# many rows have:
res_comparisons |>
filter(tides_modified_from_.sd_limits == FALSE & tides_new == TRUE)## # A tibble: 35 × 16
## mean sd min_sd_.sd_limits min_sd_new min_sd_package min_sd_match
## <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 1.1 0.3 NA 0.3 0.3 FALSE
## 2 1.2 0.4 NA 0.4 0.4 FALSE
## 3 1.3 0.47 NA 0.47 0.47 FALSE
## 4 1.4 0.5 NA 0.5 0.5 FALSE
## 5 1.5 0.52 NA 0.52 0.52 FALSE
## 6 1.6 0.5 NA 0.5 0.5 FALSE
## 7 1.7 0.47 NA 0.47 0.47 FALSE
## 8 1.8 0.4 NA 0.4 0.4 FALSE
## 9 1.9 0.3 NA 0.3 0.3 FALSE
## 10 1.9 2.07 NA 0.3 0.3 FALSE
## # ℹ 25 more rows
## # ℹ 10 more variables: max_sd_.sd_limits <dbl>, max_sd_new <dbl>,
## # max_sd_package <dbl>, max_sd_match <lgl>, grim.x <lgl>, grimmer.x <lgl>,
## # tides_modified_from_.sd_limits <lgl>, tides_new <lgl>, tides_package <lgl>,
## # tides_match <lgl>
temp <- res_comparisons |>
filter(!is.na(min_sd_.sd_limits)) |>
distinct(min_sd_match, max_sd_match, tides_match,
.keep_all = TRUE)